/**
* Licensed to WibiData, Inc. under one or more contributor license
* agreements. See the NOTICE file distributed with this work for
* additional information regarding copyright ownership. WibiData, Inc.
* licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.kiji.maven.plugins.hbase;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.maven.artifact.Artifact;
import org.apache.maven.artifact.DependencyResolutionRequiredException;
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.project.MavenProject;
/**
* A maven goal that starts a mini HBase cluster in a new daemon thread.
*
* <p>A new daemon thread is created that starts a mini HBase cluster. The main thread
* blocks until the HBase cluster has full started. The daemon thread with the
* in-process HBase cluster will continue to run in the background until stopped by the
* 'stop' goal of the plugin.</p>
*
* <p>The configuration of the started mini HBase cluster will be written to a
* hbase-site.xml file in the test classpath ('${basedir}/target/test-classes' by
* default). The path to the generated configuration file may be customized with the
* 'hbaseSiteFile' property</p>
*
* <p>A configuration index can be written by this goal. The configuration index is a file that
* contains one line for each configuration file written by this goal, where the line contains the
* path to the configuration file. By default, the goal does not write a configuration index.
* Setting the property 'writeConfIndex' to true will cause a configuration index to be written.
* By default, the configuration index will be written to
* '${basedir}/target/test-classes/conf-index.conf'. The path to the generated configuration index
* can be customized by setting the 'hbaseConfIndex' property.</p>
*
* @goal start
* @phase pre-integration-test
* @requiresDependencyResolution test
*/
public class StartMojo extends AbstractMojo {
/**
* If true, this goal should be a no-op.
*
* @parameter property="skip" default-value="false"
*/
private boolean mSkip;
/**
* If true, the Hadoop temporary directory (given by Hadoop configuration property hadoop.tmp
* .dir) will be cleared before the cluster is started, then copied to the project's build
* directory before the cluster is shutdown.
*
* @parameter property="saveHadoopTmpDir" expression="${save.hadoop.tmp}" default-value="false"
* @required
*/
private boolean mSaveHadoopTmpDir;
/**
* The file that will store the configuration required to connect to the started mini HBase
* cluster. This file will be generated by the goal.
*
* @parameter property="hbaseSiteFile" expression="${hbase.site.file}" default-value="${project.build.testOutputDirectory}/hbase-site.xml"
* @required
*/
private File mHBaseSiteFile;
/**
* Whether two separate XML configuration files should be generated
* (one for HDFS+MapReduce, and one for HBase).
*
* @parameter property="separateConfigurationFiles" expression="${separate.configuration.files}" default-value="true"
*/
private boolean mSeparateConfigurationFiles;
/**
* The file that will store the configuration required to connect to the started mini HDFS and
* MapReduce clusters. This file will be generated by the goal.
*
* @parameter property="coreSiteFile" expression="${core.site.file}" default-value="${project.build.testOutputDirectory}/core-site.xml"
* @required
*/
private File mCoreSiteFile;
/**
* If true, this goal should write an index file that provides the paths to the HBase
* configuration files written by this goal.
*
* @parameter property="writeConfIndex" expression="${hbase.conf.index}" default-value="false"
*/
private boolean mWriteConfIndex;
/**
* The file that will store paths to the configuration files generated by the goal. This file
* will be generated by the goal and will contain one line for each configuration file giving the
* path to that configuration file.
*
* @parameter property="hbaseConfIndex" expression="${hbase.conf.index.file}" default-value="${project.build.testOutputDirectory}/conf-index.conf"
*/
private File mHBaseConfIndex;
/**
* If true, also start a mini MapReduce cluster.
*
* @parameter property="mapReduceEnabled" expression="${mapreduce.enabled}" default-value="false"
*/
private boolean mIsMapReduceEnabled;
/**
* Extra Hadoop configuration properties to use.
*
* @parameter property="hadoopConfiguration"
*/
private Properties mHadoopConfiguration;
/**
* A list of this plugin's dependency artifacts.
*
* @parameter default-value="${plugin.artifacts}"
* @required
* @readonly
*/
private List<Artifact> mPluginDependencyArtifacts;
/**
* The maven project this plugin is running within.
*
* @parameter default-value="${project}"
* @required
* @readonly
*/
private MavenProject mMavenProject;
/**
* Sets whether this goal should be a no-op.
*
* @param skip If true, this goal should do nothing.
*/
public void setSkip(boolean skip) {
mSkip = skip;
}
/**
* Sets whether the Hadoop temporary directory, given by hadoop.tmp.dir, should be cleared
* before the cluster is started and copied to the project build directory before the cluster
* is shutdown.
*
* @param saveTempDir If true, the directory will be copied to the project build directory
* before the cluster is shutdown.
*/
public void setSaveHadoopTmpDir(boolean saveTempDir) {
mSaveHadoopTmpDir = saveTempDir;
}
/**
* Sets the file that we should write the HBase cluster configuration to.
*
* <p>Note: The property "hbaseSiteFile" defined in this mojo means this method must be
* named setHbaseSiteFile instead of setHBaseSiteFile.</p>
*
* @param hbaseSiteFile The file we should write to.
*/
public void setHbaseSiteFile(File hbaseSiteFile) {
mHBaseSiteFile = hbaseSiteFile;
}
/**
* Sets whether separate core-site.xml and hbase-site.xml files
* should be generated (otherwise they are combined into a single
* hbase-site.xml file).
*
* @param separateConfigurationFiles Whether they should be separated.
*/
public void setSeparateConfigurationFiles(boolean separateConfigurationFiles) {
mSeparateConfigurationFiles = separateConfigurationFiles;
}
/**
* Sets the file that we should write the MapReduce/HDFS cluster configuration to.
*
* @param coreSiteFile The file we should write to.
*/
public void setCoreSiteFile(File coreSiteFile) {
mCoreSiteFile = coreSiteFile;
}
/**
* Sets whether this goal should write a configuration index file.
*
* @param writeConfIndex True if an index file should be written, false otherwise.
*/
public void setWriteConfIndex(boolean writeConfIndex) {
mWriteConfIndex = writeConfIndex;
}
/**
* Sets the file that the HBase configuration index should be written to.
*
* <p>Note: The property "hbaseConfIndex" defined in this mojo means this method should be named
* setHbaseConfIndex.</p>
*
* @param hbaseConfIndex The file we should write to.
*/
public void setHbaseConfIndex(File hbaseConfIndex) {
mHBaseConfIndex = hbaseConfIndex;
}
/**
* Sets whether we should start a mini MapReduce cluster in addition to the HBase cluster.
*
* @param enabled Whether to start a mini MapReduce cluster.
*/
public void setMapReduceEnabled(boolean enabled) {
mIsMapReduceEnabled = enabled;
}
/**
* Sets Hadoop configuration properties.
*
* @param properties Hadoop configuration properties to use in the mini cluster.
*/
public void setHadoopConfiguration(Properties properties) {
mHadoopConfiguration = properties;
}
/**
* Starts a mini HBase cluster in a new thread.
*
* <p>This method is called by the maven plugin framework to run the goal.</p>
*
* @throws MojoExecutionException If there is a fatal error during this goal's execution.
*/
@Override
public void execute() throws MojoExecutionException {
if (mSkip) {
getLog().info("Not starting an HBase cluster because skip=true.");
return;
}
System.setProperty("java.class.path", getClassPath());
getLog().info("Set java.class.path to: " + System.getProperty("java.class.path"));
// Set any extra hadoop options.
Configuration conf = new Configuration();
if (null != mHadoopConfiguration) {
for (Map.Entry<Object, Object> property : mHadoopConfiguration.entrySet()) {
String confKey = property.getKey().toString();
String confValue = property.getValue().toString();
getLog().info("Setting hadoop conf property '" + confKey + "' to '" + confValue + "'");
conf.set(confKey, confValue);
}
}
// If necessary, clear the Hadoop tmp dir.
if (mSaveHadoopTmpDir) {
removeHadoopTmpDir(conf);
}
// Start the cluster.
try {
MiniHBaseClusterSingleton.INSTANCE.startAndWaitUntilReady(
getLog(), mIsMapReduceEnabled, conf);
} catch (IOException e) {
throw new MojoExecutionException("Unable to start HBase cluster.", e);
}
if (mSeparateConfigurationFiles) {
// Write separate core-site and hbase-site files.
writeHBaseSiteFile(conf);
writeCoreSiteFile(conf);
} else {
// Combine the configs into a single hbase-site file.
writeSiteFile(conf, mHBaseSiteFile);
}
// Write the configuration index.
if (mWriteConfIndex) {
writeConfigurationIndex();
}
}
/**
* Deletes the directory given by hadoop.tmp.dir in the specified configuration. The
* MapReduce cluster started by this plugin will store logs for job tasks in a job-specific
* directory under hadoop.tmp.dir/userlogs. The
* {@link org.apache.hadoop.hbase.HBaseTestingUtility} will delete log files on shutdown but
* not the directory structure, making it hard to locate specific job logs after multiple runs.
* Clearing hadoop.tmp.dir before the cluster starts again alleviates this problem.
*
* @param conf A Hadoop configuration used to determine the value of hadoop.tmp.dir.
*/
private void removeHadoopTmpDir(Configuration conf) {
String hadoopTmpPath = conf.get("hadoop.tmp.dir");
File hadoopTmp = new File(hadoopTmpPath);
if (hadoopTmp.exists()) {
getLog().info("Deleting Hadoop tmp dir " + hadoopTmp.toString() + " because it already " +
"exists.");
try {
FileUtils.deleteDirectory(hadoopTmp);
getLog().info("Successfully deleted Hadoop tmp dir: " + hadoopTmp.toString());
} catch (IOException e) {
getLog().warn("An existing Hadoop tmp dir could not be deleted.", e);
}
}
}
/**
* Gets the runtime classpath required to run the mini clusters.
*
* <p>The maven classloading scheme is nonstandard. They only put the "classworlds" jar
* on the classpath, and it takes care of ClassLoading the rest of the jars. This a
* problem if we are going to start a mini MapReduce cluster. The TaskTracker will
* start a child JVM with the same classpath as this process, and it won't have
* configured the classworlds class loader. To work around this, we will put all of
* our dependencies into the java.class.path system property, which will be read by
* the TaskRunner's child JVM launcher to build the child JVM classpath.</p>
*
* <p>Note that when we say "all of our dependencies" we mean both the dependencies of
* this plugin as well as the test classes and dependencies of the project that is
* running the plugin. We need to include the latter on the classpath because tests are
* still just .class files at integration-test-time. There will be no jars available
* yet to put on the distributed cache via job.setJarByClass(). Hence, all of the
* test-classes in the project running this plugin need to already be on the classpath
* of the MapReduce cluster.<p>
*/
private String getClassPath() throws MojoExecutionException {
// Maintain a set of classpath components added so we can de-dupe.
Set<String> alreadyAddedComponents = new HashSet<String>();
// Use this to build up the classpath string.
StringBuilder classpath = new StringBuilder();
// Add the existing classpath.
String existingClasspath = System.getProperty("java.class.path");
classpath.append(existingClasspath);
alreadyAddedComponents.addAll(Arrays.asList(existingClasspath.split(":")));
// Add the test classes and dependencies of the maven project running this plugin.
//
// Note: It is important that we add these classes and dependencies before we add this
// plugin's dependencies in case the maven project needs to override a jar version.
List<?> testClasspathComponents;
try {
testClasspathComponents = mMavenProject.getTestClasspathElements();
} catch (DependencyResolutionRequiredException e) {
throw new MojoExecutionException("Unable to retrieve project test classpath", e);
}
for (Object testClasspathComponent : testClasspathComponents) {
String dependency = testClasspathComponent.toString();
if (alreadyAddedComponents.contains(dependency)) {
continue;
}
classpath.append(":");
classpath.append(dependency);
alreadyAddedComponents.add(dependency);
}
// Add this plugin's dependencies.
for (Artifact artifact : mPluginDependencyArtifacts) {
String dependency = artifact.getFile().getPath();
if (alreadyAddedComponents.contains(dependency)) {
continue;
}
classpath.append(":");
classpath.append(dependency);
alreadyAddedComponents.add(dependency);
}
return classpath.toString();
}
/**
* Writes the HBase-specific contents of the specified configuration to the HBase site file.
*
* @param conf The configuration to write.
* @throws MojoExecutionException If there is an error writing the file.
*/
private void writeHBaseSiteFile(Configuration conf) throws MojoExecutionException {
writeSiteFile(getHBaseOnlyConfiguration(conf), mHBaseSiteFile);
}
/**
* Writes the MapReduce/HDFS-specific contents of the specified configuration to the core
* site file.
*
* @param conf The configuration to write.
* @throws MojoExecutionException If there is an error writing the file.
*/
private void writeCoreSiteFile(Configuration conf) throws MojoExecutionException {
writeSiteFile(getMapReduceOnlyConfiguration(conf), mCoreSiteFile);
}
/**
* Writes the specified configuration to the specified file.
*
* @param conf The configuration to write.
* @param siteFile The file to write the configuration to.
* @throws MojoExecutionException If there is an error writing the file.
*/
private void writeSiteFile(Configuration conf, File siteFile) throws MojoExecutionException {
// Create the parent directory for the site file if it does not already exist.
createFileParentDir(siteFile);
// Write the file.
FileOutputStream fileOutputStream = null;
try {
fileOutputStream = new FileOutputStream(siteFile);
conf.writeXml(fileOutputStream);
} catch (IOException e) {
throw new MojoExecutionException(
"Unable to write to site file: " + siteFile.getPath(), e);
} finally {
closeFileOutputStream(fileOutputStream);
}
getLog().info("Wrote " + siteFile.getPath() + ".");
}
/**
* Gets a new configuration created from the specified configuration, including only HBase
* configuration variables.
*
* @param conf The configuration to filter.
* @return A new configuration containing copies of the appropriate configuration variables.
*/
private Configuration getHBaseOnlyConfiguration(Configuration conf) {
return getFilteredConfiguration(conf, true);
}
/**
* Gets a new configuration created from the specified configuration, including only
* MapReduce/HDFS configuration variables.
*
* @param conf The configuration to filter.
* @return A new configuration containing copies of the appropriate configuration variables.
*/
private Configuration getMapReduceOnlyConfiguration(Configuration conf) {
return getFilteredConfiguration(conf, false);
}
/**
* Gets a new configuration created from the specified configuration,
* including only MapReduce/HDFS configuration variables or HBase only configuration variables.
*
* @param conf The configuration to filter.
* @param hBaseOnly <code>true</code> if only HBase configuration variables should be included,
* <code>false</code> if only MapReduce/HDFS configuration variables should be included.
* @return A new configuration with copies of the appropriate configuration variables.
*/
private Configuration getFilteredConfiguration(Configuration conf, boolean hBaseOnly) {
Configuration filteredConf = new Configuration(false);
for (Map.Entry<String, String> entry: conf) {
boolean startsWithHBase = entry.getKey().startsWith("hbase");
if ((startsWithHBase && hBaseOnly) || (!startsWithHBase && !hBaseOnly)) {
filteredConf.set(entry.getKey(), entry.getValue());
}
}
return filteredConf;
}
/**
* Writes a configuration index.
*
* @throws MojoExecutionException If there is an error writing the configuration file.
*/
private void writeConfigurationIndex() throws MojoExecutionException {
// Create the parent directory of the file we are writing.
createFileParentDir(mHBaseConfIndex);
// Write the file.
FileOutputStream fileOutputStream = null;
PrintWriter fileWriter = null;
try {
fileOutputStream = new FileOutputStream(mHBaseConfIndex);
fileWriter = new PrintWriter(fileOutputStream);
fileWriter.println(mHBaseSiteFile.getPath());
} catch (IOException e) {
throw new MojoExecutionException(
"Unable to write to configuration index file: " + mHBaseConfIndex.getPath(), e);
} finally {
if (null != fileWriter) {
fileWriter.close();
}
closeFileOutputStream(fileOutputStream);
}
getLog().info("Wrote " + mHBaseConfIndex.getPath() + ".");
}
/**
* Gets the parent directory of the specified file. Creates the directory if it does not already
* exist.
*
* @return The parent directory.
* @throws MojoExecutionException If there is an error getting or creating the parent directory.
*/
private static File createFileParentDir(File file) throws MojoExecutionException {
File parentDir = file.getParentFile();
if (null != parentDir && !parentDir.exists() && !parentDir.mkdirs()) {
throw new MojoExecutionException(
"Unable to create or access parent directory of: "
+ file.getParent());
}
return parentDir;
}
/**
* Closes the specified FileOutputStream. The specified stream may be null, in which case this
* operation is a no-op.
*
* @throws MojoExecutionException If there is an error closing the stream.
*/
private static void closeFileOutputStream(FileOutputStream stream) throws MojoExecutionException {
if (null != stream) {
try {
stream.close();
} catch (IOException e) {
throw new MojoExecutionException ("Unable to close file stream.", e);
}
}
}
}